Visualizing the 2016 General Election Polls


In [ ]:
import pandas as pd
import numpy as np
from __future__ import print_function
from ipywidgets import VBox, HBox
import os

codes = pd.read_csv(os.path.abspath('../data_files/state_codes.csv'))

In [ ]:
try:
    from pollster import Pollster
except ImportError:
    print('Pollster not found. Installing Pollster..')
    import pip
    try:
        pip.main(['install', 'pollster==0.1.6'])
    except:
        print("The pip installation failed. Please manually install Pollster and re-run this notebook.")

In [ ]:
def get_candidate_data(question):
    clinton, trump, undecided, other = 0., 0., 0., 0.
    for candidate in question['subpopulations'][0]['responses']:
        if candidate['last_name'] == 'Clinton':
            clinton = candidate['value']
        elif candidate['last_name'] == 'Trump':
            trump = candidate['value']
        elif candidate['choice'] == 'Undecided':
            undecided = candidate['value']
        else:
            other = candidate['value']
    return clinton, trump, other, undecided

def get_row(question, partisan='Nonpartisan', end_date='2016-06-21'):
    # if question['topic'] != '2016-president':
    if ('2016' in question['topic']) and ('Presidential' in question['topic']):
        hillary, donald, other, undecided = get_candidate_data(question)
        return [{'Name': question['name'], 'Partisan': partisan, 'State': question['state'],
                 'Date': np.datetime64(end_date), 'Trump': donald, 'Clinton': hillary, 'Other': other,
                 'Undecided': undecided}]
    else:
        return

def analyze_polls(polls):
    global data
    for poll in polls:
        for question in poll.questions:
            resp = get_row(question, partisan=poll.partisan, end_date=poll.end_date)
            if resp is not None:
                data = data.append(resp)
    return

In [ ]:
try:
    from pollster import Pollster
    pollster = Pollster()
    
    # Getting data from Pollster. This might take a second.
    raw_data = pollster.charts(topic='2016-president')
    
    data = pd.DataFrame(columns=['Name', 'Partisan', 'State', 'Date', 'Trump', 'Clinton', 'Other',
                     'Undecided'])
    
    for i in raw_data:
        analyze_polls(i.polls())
except:
    raise ValueError('Please install Pollster and run the functions above')

In [ ]:
def get_state_party(code):
    state = codes[codes['FIPS']==code]['USPS'].values[0]
    if data[data['State']==state].shape[0] == 0:
        return None
    polls = data[(data['State']==state) & (data['Trump'] > 0.) & (data['Clinton'] > 0.)].sort_values(by='Date')
    if polls.shape[0] == 0:
        return None
    if (polls.tail(1)['Trump'] > polls.tail(1)['Clinton']).values[0]:
        return 'Republican'
    else:
        return 'Democrat'

def get_color_data():
    color_data = {}
    for i in codes['FIPS']:
        color_data[i] = get_state_party(i)
    return color_data

def get_state_data(code):
    state = codes[codes['FIPS']==code]['USPS'].values[0]
    if data[data['State']==state].shape[0] == 0:
        return None
    polls = data[(data['State']==state) & (data['Trump'] > 0.) & (data['Clinton'] > 0.)].sort_values(by='Date')
    return polls

In [ ]:
from bqplot import *
from ipywidgets import Layout

In [ ]:
dt_x = DateScale()
sc_y = LinearScale()

time_series = Lines(scales={'x': dt_x, 'y': sc_y}, colors=['#E91D0E', '#2aa1ec'], marker='circle')

ax_x = Axis(scale=dt_x, label='Date')
ax_y = Axis(scale=sc_y, orientation='vertical', label='Percentage')

ts_fig = Figure(marks=[time_series], axes=[ax_x, ax_y], title='General Election - State Polls', 
                layout=Layout(min_width='650px', min_height='400px'))

In [ ]:
sc_geo = AlbersUSA()
sc_c1 = OrdinalColorScale(domain=['Democrat', 'Republican'], colors=['#2aa1ec', '#E91D0E'])

color_data = get_color_data()

map_styles = {'color': color_data,
              'scales': {'projection': sc_geo, 'color': sc_c1}, 'colors': {'default_color': 'Grey'}}

axis = ColorAxis(scale=sc_c1)

states_map = Map(map_data=topo_load('map_data/USStatesMap.json'), tooltip=ts_fig, **map_styles)
map_fig = Figure(marks=[states_map], axes=[axis],title='General Election Polls - State Wise')

In [ ]:
def hover_callback(name, value):
    polls = get_state_data(value['data']['id'])
    if polls is None or polls.shape[0] == 0:
        time_series.y = [0.]
        return
    time_series.x, time_series.y = polls['Date'].values.astype(np.datetime64), [polls['Trump'].values, polls['Clinton'].values]
    ts_fig.title = str(codes[codes['FIPS']==value['data']['id']]['Name'].values[0]) + ' Polls - Presidential Election'

In [ ]:
states_map.on_hover(hover_callback)

In [ ]:
national = data[(data['State']=='US') & (data['Trump'] > 0.) & (data['Clinton'] > 0.)].sort_values(by='Date')

dt_x = DateScale()
sc_y = LinearScale()

clinton_scatter = Scatter(x=national['Date'].values.astype(np.datetime64), y=national['Clinton'],
                          scales={'x': dt_x, 'y': sc_y}, 
                          colors=['#2aa1ec'])

trump_scatter = Scatter(x=national['Date'].values.astype(np.datetime64), y=national['Trump'],
                        scales={'x': dt_x, 'y': sc_y},
                        colors=['#E91D0E'])

ax_x = Axis(scale=dt_x, label='Date', tick_format='%b-%Y', num_ticks=8)
ax_y = Axis(scale=sc_y, orientation='vertical', label='Percentage')

scat_fig = Figure(marks=[clinton_scatter, trump_scatter], axes=[ax_x, ax_y], title='General Election - National Polls')

Hover on the map to visualize the poll data for that state.


In [ ]:
VBox([map_fig, scat_fig])

Visualizing the County Results of the 2008 Elections


In [ ]:
county_data = pd.read_csv(os.path.abspath('../data_files/2008-election-results.csv'))

In [ ]:
winner = np.array(['McCain'] * county_data.shape[0])

In [ ]:
winner[(county_data['Obama'] > county_data['McCain']).values] = 'Obama'

In [ ]:
sc_geo_county = AlbersUSA()
sc_c1_county = OrdinalColorScale(domain=['McCain', 'Obama'], colors=['Red', 'DeepSkyBlue'])

color_data_county = dict(zip(county_data['FIPS'].values.astype(int), list(winner)))

map_styles_county = {'color': color_data_county,
              'scales': {'projection': sc_geo_county, 'color': sc_c1_county}, 'colors': {'default_color': 'Grey'}}

axis_county = ColorAxis(scale=sc_c1_county)

county_map = Map(map_data=topo_load('map_data/USCountiesMap.json'), **map_styles_county)
county_fig = Figure(marks=[county_map], axes=[axis_county],title='US Elections 2008 - Example',
                    layout=Layout(min_width='800px', min_height='550px'))

In [ ]:
names_sc = OrdinalScale(domain=['Obama', 'McCain'])
vote_sc_y = LinearScale(min=0, max=100.)

names_ax = Axis(scale=names_sc, label='Candidate')
vote_ax = Axis(scale=vote_sc_y, orientation='vertical', label='Percentage')

vote_bars = Bars(scales={'x': names_sc, 'y': vote_sc_y}, colors=['#2aa1ec', '#E91D0E'])

bar_fig = Figure(marks=[vote_bars], axes=[names_ax, vote_ax], title='Vote Margin',
                 layout=Layout(min_width='600px', min_height='400px'))

In [ ]:
def county_hover(name, value):
    if (county_data['FIPS'] == value['data']['id']).sum() == 0:
        bar_fig.title = ''
        vote_bars.y = [0., 0.]
        return
    votes = county_data[county_data['FIPS'] == value['data']['id']]
    dem_vote = float(votes['Obama %'].values[0])
    rep_vote = float(votes['McCain %'].values[0])
    vote_bars.x, vote_bars.y = ['Obama', 'McCain'], [dem_vote, rep_vote]
    bar_fig.title = 'Vote % - ' + value['data']['name']
    
county_map.on_hover(county_hover)
county_map.tooltip = bar_fig

Hover on the map to visualize the voting percentage for each candidate in that county


In [ ]:
county_fig